From: Jan Beulich Date: Fri, 30 Sep 2022 13:16:22 +0000 (+0200) Subject: x86/NUMA: improve memnode_shift calculation for multi node system X-Git-Tag: archive/raspbian/4.17.0-1+rpi1^2~33^2~163 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks:///%22http:/www.example.com/cgi/%22https:/%22bookmarks:/?a=commitdiff_plain;h=1666086b00442b23e4fd70f4971e3bcf1a16b124;p=xen.git x86/NUMA: improve memnode_shift calculation for multi node system SRAT may describe individual nodes using multiple ranges. When they're adjacent (with or without a gap in between), only the start of the first such range actually needs accounting for. Furthermore the very first range doesn't need considering of its start address at all, as it's fine to associate all lower addresses (with no memory) with that same node. For this to work, the array of ranges needs to be sorted by address - adjust logic accordingly in acpi_numa_memory_affinity_init(). Signed-off-by: Jan Beulich Acked-by: Roger Pau Monné --- diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c index 4f742414b0..2c3c1c15fe 100644 --- a/xen/arch/x86/numa.c +++ b/xen/arch/x86/numa.c @@ -127,7 +127,8 @@ static int __init extract_lsb_from_nodes(const struct node *nodes, epdx = paddr_to_pdx(nodes[i].end - 1) + 1; if ( spdx >= epdx ) continue; - bitfield |= spdx; + if ( i && (!nodeids || nodeids[i - 1] != nodeids[i]) ) + bitfield |= spdx; if ( !i || !nodeids || nodeids[i - 1] != nodeids[i] ) nodes_used++; if ( epdx > memtop ) diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c index b62a152911..fbcd8749c4 100644 --- a/xen/arch/x86/srat.c +++ b/xen/arch/x86/srat.c @@ -312,6 +312,7 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma) unsigned pxm; nodeid_t node; unsigned int i; + bool next = false; if (srat_disabled()) return; @@ -413,14 +414,37 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma) node, pxm, start, end - 1, ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : ""); - node_memblk_range[num_node_memblks].start = start; - node_memblk_range[num_node_memblks].end = end; - memblk_nodeid[num_node_memblks] = node; + /* Keep node_memblk_range[] sorted by address. */ + for (i = 0; i < num_node_memblks; ++i) + if (node_memblk_range[i].start > start || + (node_memblk_range[i].start == start && + node_memblk_range[i].end > end)) + break; + + memmove(&node_memblk_range[i + 1], &node_memblk_range[i], + (num_node_memblks - i) * sizeof(*node_memblk_range)); + node_memblk_range[i].start = start; + node_memblk_range[i].end = end; + + memmove(&memblk_nodeid[i + 1], &memblk_nodeid[i], + (num_node_memblks - i) * sizeof(*memblk_nodeid)); + memblk_nodeid[i] = node; + if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { - __set_bit(num_node_memblks, memblk_hotplug); + next = true; if (end > mem_hotplug) mem_hotplug = end; } + for (; i <= num_node_memblks; ++i) { + bool prev = next; + + next = test_bit(i, memblk_hotplug); + if (prev) + __set_bit(i, memblk_hotplug); + else + __clear_bit(i, memblk_hotplug); + } + num_node_memblks++; }